#!/bin/bash
cd /share/projset/dsir7
arr=("aa" "ab" "ac" "ad" "ae" "af" "ag" "ah" "ai" "aj" "ak" "al" "am" "an" "ao" "ap" "aq" "ar" "as" "at" "au" "av" "aw" "ax" "ay" "az" "ba" "bb" "bc" "bd")
index=$1
input_dir=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/dclm-part${arr[$index]}_piqa+hellaswag_natural_text_0.0_0/resampled
output_file=dclm-part${arr[$index]}_piqa+hellaswag_natural_text.jsonl
echo $input_dir $output_file
bash /share/projset/dsir7/scripts/merged_jsonl.sh $input_dir $output_file
echo "FINISHED"